0: Preparation

Defining the input and output files

# Clean the working environment
rm(list = ls())

knitr::opts_chunk$set(echo = TRUE)


####################################  
# Defining the input files
#################################### 

Selection_strength_test_dir <- Sys.getenv("Selection_strength_test_dir")

Sweep_test_dir <-  Sys.getenv("Sweep_test_dir")

############### 
## Empirical ###
###############

### ROH hotspots ###
Empirical_data_ROH_hotspots_dir  <- Sys.getenv("Empirical_data_ROH_hotspots_dir")
Empirical_data_autosome_ROH_freq_dir <- Sys.getenv("Empirical_data_autosome_ROH_freq_dir")
### Inbreeding coefficient ###

Empirical_data_F_ROH_dir  <- Sys.getenv("Empirical_data_F_ROH_dir")

### Expected Heterozygosity distribution ###
Empirical_data_H_e_dir <- Sys.getenv("Empirical_data_H_e_dir")

############### 
## Simulated ###
###############

### Causative variant ###

Selection_causative_variant_windows_dir <- Sys.getenv("Selection_causative_variant_windows_dir")
variant_freq_plots_dir <- Sys.getenv("variant_freq_plots_dir")
variant_position_dir <- Sys.getenv("variant_position_dir")



### ROH hotspots ###
Neutral_model_ROH_hotspots_dir <- Sys.getenv("Neutral_model_ROH_hotspots_dir")
Neutral_model_autosome_ROH_freq_dir <- Sys.getenv("Neutral_model_autosome_ROH_freq_dir")

Selection_model_ROH_hotspots_dir  <- Sys.getenv("Selection_model_ROH_hotspots_dir")
Selection_model_autosome_ROH_freq_dir <- Sys.getenv("Selection_model_autosome_ROH_freq_dir")

### Inbreeding coefficient ###
Neutral_model_F_ROH_dir  <- Sys.getenv("Neutral_model_F_ROH_dir")
Selection_model_F_ROH_dir  <- Sys.getenv("Selection_model_F_ROH_dir")

### Expected Heterozygosity distribution ###
Neutral_model_H_e_dir <- Sys.getenv("Neutral_model_H_e_dir")
Selection_model_H_e_dir <- Sys.getenv("Selection_model_H_e_dir")


histogram_line_sizes <- 3
empirical_data_color <- "darkgreen"
neutral_model_color <- "blue" 
selection_model_color <- "purple"


# Sys.getenv()  

# # Set the working directory for notebook chunks
# knitr::opts_knit$set(root.dir = output_dir_sweep_test)

Loading libraries

library(knitr)
## Warning: package 'knitr' was built under R version 4.3.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.1

Causative variant

Fixation time

# Function to determine the number of rows until fixation is reached
time_to_fixation <- function(data, threshold = 0.9) {
  # Find the index of the first row where the allele frequency is 0.9 or higher
  fixation_index <- which(data$allele_frequency >= threshold)[1]
  
  # Return the number of rows until fixation is reached
  return(fixation_index - 1)
}
Selection_coefficient Avg_Fixation_time Min_fixation_time Max_fixation_time
s02_chr3 36.2 34 38
s03_chr3 26.0 18 33
s04_chr3 22.8 16 32
s05_chr3 18.2 11 23
s06_chr3 11.6 9 15
s07_chr3 9.2 8 12
s08_chr3 8.2 7 9

Variant position

Window lengths

Causative variant Summary

Selection_coefficient Avg_Length_Mb Min_freq Max_freq Avg_window_freq Avg_freq_variant_100k_window
4 s05_chr3 4.86 0 0.06 0.0012857 0.000
3 s04_chr3 6.62 0 0.32 0.0475844 0.044
5 s06_chr3 7.08 0 0.36 0.0627515 0.052
7 s08_chr3 7.98 0 0.30 0.0558333 0.040
1 s02_chr3 8.54 0 0.38 0.1229262 0.116
2 s03_chr3 8.58 0 0.76 0.1981140 0.196
6 s07_chr3 9.74 0 0.10 0.0047496 0.000

Bootstrap confidence interval function

Confidence level <=> konfidensgrad

# Function to calculate bootstrap confidence intervals
bootstrap_confidence_interval_fun <- function(observed_data, n_bootstraps = 1000, confidence_level = 0.95) {
  
  # Function to calculate the mean for each bootstrap sample
  compute_bootstrap_mean_fun <- function(observed_data_urn) {
    bootstrap_dataset <- sample(observed_data_urn, replace = TRUE)
    bootstrap_estimate <- mean(bootstrap_dataset)
    return(bootstrap_estimate)
  }
  
  # Perform bootstrap resampling
  bootstrap_point_estimates <- replicate(n_bootstraps, compute_bootstrap_mean_fun(observed_data))
  
  # Calculate the percentiles for the confidence interval
  alpha <- (1 - confidence_level) / 2
  confidence_interval_lower_bound <- quantile(bootstrap_point_estimates, alpha) # 2.5th percentile
  confidence_interval_upper_bound <- quantile(bootstrap_point_estimates, 1 - alpha) # 97.5th percentile
  
  # Return the confidence interval
  return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
}

1: ROH-Hotspots

1.1 : Autosome ROH-frequencies

1.1.1 : Empirical - ROH frequency

1.1.2: Neutral Model - ROH frequency

1.1.3: Selection Model

1.4 ROH-hotspot threshold summary

## ROH-hotspot selection testing results://n
Model Avg_ROH_hotspot_threshold
Neutral 0.284
selection_model_s03_chr3 0.304
selection_model_s02_chr3 0.336
selection_model_s07_chr3 0.336
selection_model_s05_chr3 0.352
selection_model_s06_chr3 0.368
selection_model_s08_chr3 0.380
selection_model_s04_chr3 0.404
Empirical 0.750

2: Inbreeding coefficient

2.1 Empirical data

## Overall Average Avg_F_ROH for  german_shepherd : 0.2753012 //n

2.2 Neutral Model

## Point estimate F_ROH across all 20 simulations: 0.2325104 //n
## [1] "Bootstrap 95% Confidence Interval: [0.209087812, 0.2611282]"

2.3 Selection Model

## Point estimate F_ROH across all 20 simulations for  selection_model_s02_chr3 : 0.2758382 //n[1] "Bootstrap 95% Confidence Interval: [0.25748676, 0.29418964]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s03_chr3 : 0.2542621 //n[1] "Bootstrap 95% Confidence Interval: [0.23613044, 0.268683734]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s04_chr3 : 0.3137271 //n[1] "Bootstrap 95% Confidence Interval: [0.27509544, 0.35235872]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s05_chr3 : 0.2852615 //n[1] "Bootstrap 95% Confidence Interval: [0.271808452, 0.30210684]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s06_chr3 : 0.291331 //n[1] "Bootstrap 95% Confidence Interval: [0.23433544, 0.34739656]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s07_chr3 : 0.2620261 //n[1] "Bootstrap 95% Confidence Interval: [0.22305532, 0.31074256]"

## Point estimate F_ROH across all 20 simulations for  selection_model_s08_chr3 : 0.3038106 //n[1] "Bootstrap 95% Confidence Interval: [0.2790912, 0.32875364]"

2.4 F_ROH summary

Model F_ROH Lower_CI Upper_CI
Neutral 0.23251 0.20909 0.26113
s03 0.25426 0.23613 0.26868
s07 0.26203 0.22306 0.31074
Empirical 0.27530 NA NA
s02 0.27584 0.25749 0.29419
s05 0.28526 0.27181 0.30211
s06 0.29133 0.23434 0.34740
s08 0.30381 0.27909 0.32875
s04 0.31373 0.27510 0.35236

2.4.1 Visualizaing F_ROH

## Models where empirical F_ROH is within CI:
## [1] "s02" "s04" "s05" "s06" "s07"
## 
## Models where empirical F_ROH is outside CI:
## [1] "s03"     "s08"     "Neutral"

3: Expected Heterozygosity

3.1 Empirical data

3.2 Neutral Model

3.3 Selection Model

3.4 Expected Heterozygosity Summary

Model H_e_5th_percentile Lower_CI Upper_CI
s05 0.14698 0.13360 0.16037
s04 0.14930 0.13344 0.17414
s07 0.16899 0.14625 0.19107
s02 0.17367 0.15716 0.19021
s03 0.17539 0.15927 0.19548
s08 0.17670 0.16704 0.18635
Neutral 0.17952 0.16704 0.19200
s06 0.18277 0.16560 0.19994
Empirical NA NA NA

4: Summary

4.0 General comparison

4.0.1 ROH-hotspot threshold comparison

## 
##  ROH-hotspot threshold comparison between the different datasets
Model Avg_ROH_hotspot_threshold
Neutral 0.284
selection_model_s03_chr3 0.304
selection_model_s02_chr3 0.336
selection_model_s07_chr3 0.336
selection_model_s05_chr3 0.352
selection_model_s06_chr3 0.368
selection_model_s08_chr3 0.380
selection_model_s04_chr3 0.404
Empirical 0.750

4.0.1 F_ROH comparison

Model F_ROH Lower_CI Upper_CI
Neutral 0.23251 0.20909 0.26113
s03 0.25426 0.23613 0.26868
s07 0.26203 0.22306 0.31074
Empirical 0.27530 NA NA
s02 0.27584 0.25749 0.29419
s05 0.28526 0.27181 0.30211
s06 0.29133 0.23434 0.34740
s08 0.30381 0.27909 0.32875
s04 0.31373 0.27510 0.35236

4.1: Hotspot comparison

4.1.1: Selection test (Sweep test)

## [1] "Selection test results"
## [1] "ROH-hotspot windows with an mean H_e Value lower or equal to the fifth percentile of the neutral models average H_e are classified as being under selection"
## [1] "5th percentile of the neutral model is: 0.1638"
Name Under_selection Window_based_Average_H_e
Hotspot_chr3_window_1 Yes 0.1082216
Hotspot_chr3_window_3 Yes 0.1230482
Hotspot_chr3_window_2 Yes 0.1445314
Hotspot_chr17_window_1 Yes 0.1449621
Hotspot_chr17_window_2 Yes 0.1486567
Hotspot_chr19_window_1 Yes 0.1603723
## [1] "ROH-hotspots under selection:"
Name Under_selection Window_based_Average_H_e
Hotspot_chr3_window_1 Yes 0.1082216
Hotspot_chr3_window_3 Yes 0.1230482
Hotspot_chr3_window_2 Yes 0.1445314
Hotspot_chr17_window_1 Yes 0.1449621
Hotspot_chr17_window_2 Yes 0.1486567
Hotspot_chr19_window_1 Yes 0.1603723

4.1.2: Selection Strength Test (Sweep test)

4.1.1.1 Extracting Hotspots under selection

5 Visualizing Expected Heterozygoisty

5.1 Bootstrap CI for mean 5th percentile H_e

## Models where empirical H_e is within CI for Hotspot_chr3_window_1 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr3_window_1 :
## [1] "s02"     "s03"     "s04"     "s05"     "s06"     "s07"     "s08"    
## [8] "Neutral"

## Models where empirical H_e is within CI for Hotspot_chr3_window_3 :
## character(0)
## 
## Models where empirical H_e is outside CI for Hotspot_chr3_window_3 :
## [1] "s02"     "s03"     "s04"     "s05"     "s06"     "s07"     "s08"    
## [8] "Neutral"

## Models where empirical H_e is within CI for Hotspot_chr3_window_2 :
## [1] "s04" "s05"
## 
## Models where empirical H_e is outside CI for Hotspot_chr3_window_2 :
## [1] "s02"     "s03"     "s06"     "s07"     "s08"     "Neutral"

## Models where empirical H_e is within CI for Hotspot_chr17_window_1 :
## [1] "s04" "s05"
## 
## Models where empirical H_e is outside CI for Hotspot_chr17_window_1 :
## [1] "s02"     "s03"     "s06"     "s07"     "s08"     "Neutral"

## Models where empirical H_e is within CI for Hotspot_chr17_window_2 :
## [1] "s04" "s05" "s07"
## 
## Models where empirical H_e is outside CI for Hotspot_chr17_window_2 :
## [1] "s02"     "s03"     "s06"     "s08"     "Neutral"

## Models where empirical H_e is within CI for Hotspot_chr19_window_1 :
## [1] "s02" "s03" "s04" "s07"
## 
## Models where empirical H_e is outside CI for Hotspot_chr19_window_1 :
## [1] "s05"     "s06"     "s08"     "Neutral"

5.2 5th percentile of the extreme H_e values

Selection_Coefficient H_e_Threshold
s02 0.1472
s03 0.1472
s04 0.1128
s05 0.1302
s06 0.1555
s07 0.1302
## 
## Hotspot Name: Hotspot_chr3_window_1

## 
## Hotspot Name: Hotspot_chr3_window_3

## 
## Hotspot Name: Hotspot_chr3_window_2

## 
## Hotspot Name: Hotspot_chr17_window_1

## 
## Hotspot Name: Hotspot_chr17_window_2

## 
## Hotspot Name: Hotspot_chr19_window_1